/*****************************************************************************
 *                                McPAT/CACTI
 *                      SOFTWARE LICENSE AGREEMENT
 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
 *                          All Rights Reserved
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met: redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer;
 * redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution;
 * neither the name of the copyright holders nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
 *
 ***************************************************************************/

#include "basic_circuit.h"
#include <assert.h>
#include <cmath>
#include <iostream>
#include "parameter.h"
using namespace cacti;

uint32_t _log2(uint64_t num) {
  uint32_t log2 = 0;

  if (num == 0) {
    std::cerr << "log0?" << std::endl;
    exit(1);
  }

  while (num > 1) {
    num = (num >> 1);
    log2++;
  }

  return log2;
}

bool is_pow2(int64_t val) {
  if (val <= 0) {
    return false;
  }
  else if (val == 1) {
    return true;
  }
  else {
    return (_log2(val) != _log2(val - 1));
  }
}

int powers(int base, int n) {
  int i, p;

  p = 1;
  for (i = 1; i <= n; ++i)
    p *= base;
  return p;
}

/*----------------------------------------------------------------------*/

double logtwo(double x) {
  assert(x > 0);
  return ((double)(log(x) / log(2.0)));
}

/*----------------------------------------------------------------------*/

double gate_C(double width, double wirelength, bool _is_dram, bool _is_cell,
              bool _is_wl_tr, bool _is_sleep_tx) {
  const TechnologyParameter::DeviceType *dt;

  if (_is_dram && _is_cell) {
    dt = &g_tp.dram_acc;  // DRAM cell access transistor
  }
  else if (_is_dram && _is_wl_tr) {
    dt = &g_tp.dram_wl;  // DRAM wordline transistor
  }
  else if (!_is_dram && _is_cell) {
    dt = &g_tp.sram_cell;  // SRAM cell access transistor
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {
    dt = &g_tp.peri_global;
  }

  return (dt->C_g_ideal + dt->C_overlap + 3 * dt->C_fringe) * width +
         dt->l_phy * Cpolywire;
}

// returns gate capacitance in Farads
// actually this function is the same as gate_C() now
double gate_C_pass(
    double width,       // gate width in um (length is Lphy_periph_global)
    double wirelength,  // poly wire length going to gate in lambda
    bool _is_dram, bool _is_cell, bool _is_wl_tr, bool _is_sleep_tx) {
  // v5.0
  const TechnologyParameter::DeviceType *dt;

  if ((_is_dram) && (_is_cell)) {
    dt = &g_tp.dram_acc;  // DRAM cell access transistor
  }
  else if ((_is_dram) && (_is_wl_tr)) {
    dt = &g_tp.dram_wl;  // DRAM wordline transistor
  }
  else if ((!_is_dram) && _is_cell) {
    dt = &g_tp.sram_cell;  // SRAM cell access transistor
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {
    dt = &g_tp.peri_global;
  }

  return (dt->C_g_ideal + dt->C_overlap + 3 * dt->C_fringe) * width +
         dt->l_phy * Cpolywire;
}

double drain_C_(double width, int nchannel, int stack,
                int next_arg_thresh_folding_width_or_height_cell,
                double fold_dimension, bool _is_dram, bool _is_cell,
                bool _is_wl_tr, bool _is_sleep_tx) {
  double w_folded_tr;
  const TechnologyParameter::DeviceType *dt;

  if ((_is_dram) && (_is_cell)) {
    dt = &g_tp.dram_acc;  // DRAM cell access transistor
  }
  else if ((_is_dram) && (_is_wl_tr)) {
    dt = &g_tp.dram_wl;  // DRAM wordline transistor
  }
  else if ((!_is_dram) && _is_cell) {
    dt = &g_tp.sram_cell;  // SRAM cell access transistor
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {
    dt = &g_tp.peri_global;
  }

  double c_junc_area = dt->C_junc;
  double c_junc_sidewall = dt->C_junc_sidewall;
  double c_fringe = 2 * dt->C_fringe;
  double c_overlap = 2 * dt->C_overlap;
  double drain_C_metal_connecting_folded_tr = 0;

  // determine the width of the transistor after folding (if it is getting
  // folded)
  if (next_arg_thresh_folding_width_or_height_cell ==
      0) {  // interpret fold_dimension as the the folding width threshold
    // i.e. the value of transistor width above which the transistor gets folded
    w_folded_tr = fold_dimension;
  }
  else {  // interpret fold_dimension as the height of the cell that this
          // transistor is part of.
    double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
    // TODO : w_folded_tr must come from Component::compute_gate_area()
    double ratio_p_to_n = 2.0 / (2.0 + 1.0);
    if (nchannel) {
      w_folded_tr =
          (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
    }
    else {
      w_folded_tr =
          ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
    }
  }
  int num_folded_tr = (int)(ceil(width / w_folded_tr));

  if (num_folded_tr < 2) {
    w_folded_tr = width;
  }

  double total_drain_w = (g_tp.w_poly_contact +
                          2 * g_tp.spacing_poly_to_contact) +  // only for drain
                         (stack - 1) * g_tp.spacing_poly_to_poly;
  double drain_h_for_sidewall = w_folded_tr;
  double total_drain_height_for_cap_wrt_gate =
      w_folded_tr + 2 * w_folded_tr * (stack - 1);
  if (num_folded_tr > 1) {
    total_drain_w +=
        (num_folded_tr - 2) *
            (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
        (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);

    if (num_folded_tr % 2 == 0) {
      drain_h_for_sidewall = 0;
    }
    total_drain_height_for_cap_wrt_gate *= num_folded_tr;
    drain_C_metal_connecting_folded_tr =
        g_tp.wire_local.C_per_um * total_drain_w;
  }

  double drain_C_area = c_junc_area * total_drain_w * w_folded_tr;
  double drain_C_sidewall =
      c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
  double drain_C_wrt_gate =
      (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;

  return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate +
          drain_C_metal_connecting_folded_tr);
}

double tr_R_on(double width, int nchannel, int stack, bool _is_dram,
               bool _is_cell, bool _is_wl_tr, bool _is_sleep_tx) {
  const TechnologyParameter::DeviceType *dt;

  if ((_is_dram) && (_is_cell)) {
    dt = &g_tp.dram_acc;  // DRAM cell access transistor
  }
  else if ((_is_dram) && (_is_wl_tr)) {
    dt = &g_tp.dram_wl;  // DRAM wordline transistor
  }
  else if ((!_is_dram) && _is_cell) {
    dt = &g_tp.sram_cell;  // SRAM cell access transistor
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {
    dt = &g_tp.peri_global;
  }

  double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
  return (stack * restrans / width);
}

/* This routine operates in reverse: given a resistance, it finds
 * the transistor width that would have this R.  It is used in the
 * data wordline to estimate the wordline driver size. */

// returns width in um
double R_to_w(double res, int nchannel, bool _is_dram, bool _is_cell,
              bool _is_wl_tr, bool _is_sleep_tx) {
  const TechnologyParameter::DeviceType *dt;

  if ((_is_dram) && (_is_cell)) {
    dt = &g_tp.dram_acc;  // DRAM cell access transistor
  }
  else if ((_is_dram) && (_is_wl_tr)) {
    dt = &g_tp.dram_wl;  // DRAM wordline transistor
  }
  else if ((!_is_dram) && (_is_cell)) {
    dt = &g_tp.sram_cell;  // SRAM cell access transistor
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {
    dt = &g_tp.peri_global;
  }

  double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
  return (restrans / res);
}

double pmos_to_nmos_sz_ratio(bool _is_dram, bool _is_wl_tr, bool _is_sleep_tx) {
  double p_to_n_sizing_ratio;
  if ((_is_dram) && (_is_wl_tr)) {  // DRAM wordline transistor
    p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
  }
  else if (_is_sleep_tx) {
    p_to_n_sizing_ratio =
        g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio;  // Sleep transistor
  }
  else {  // DRAM or SRAM all other transistors
    p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
  }
  return p_to_n_sizing_ratio;
}

// "Timing Models for MOS Circuits" by Mark Horowitz, 1984
double horowitz(double inputramptime,  // input rise time
                double tf,             // time constant of gate
                double vs1,            //  threshold voltage1/Vdd
                double vs2,            // threshold voltage2/vdd
                int rise)              // whether input rises or fall
{
  if (inputramptime == 0 && vs1 == vs2) {
    return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
  }
  double a, b, td;

  a = inputramptime / tf;
  if (rise == RISE) {
    b = 0.5;
    td = tf * sqrt(log(vs1) * log(vs1) + 2 * a * b * (1.0 - vs1)) +
         tf * (log(vs1) - log(vs2));
  }
  else {
    b = 0.4;
    td = tf * sqrt(log(1.0 - vs1) * log(1.0 - vs1) + 2 * a * b * (vs1)) +
         tf * (log(1.0 - vs1) - log(1.0 - vs2));
  }
  return (td);
}

double cmos_Ileak(double nWidth, double pWidth, bool _is_dram, bool _is_cell,
                  bool _is_wl_tr, bool _is_sleep_tx) {
  TechnologyParameter::DeviceType *dt;

  if ((!_is_dram) && (_is_cell)) {  // SRAM cell access transistor
    dt = &(g_tp.sram_cell);
  }
  else if ((_is_dram) && (_is_wl_tr)) {  // DRAM wordline transistor
    dt = &(g_tp.dram_wl);
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {  // DRAM or SRAM all other transistors
    dt = &(g_tp.peri_global);
  }
  return nWidth * dt->I_off_n + pWidth * dt->I_off_p;
}

int factorial(int n, int m) {
  int fa = m, i;
  for (i = m + 1; i <= n; i++)
    fa *= i;
  return fa;
}

int combination(int n, int m) {
  int ret;
  ret = factorial(n, m + 1) / factorial(n - m);
  return ret;
}

double simplified_nmos_Isat(double nwidth, bool _is_dram, bool _is_cell,
                            bool _is_wl_tr, bool _is_sleep_tx) {
  TechnologyParameter::DeviceType *dt;

  if ((!_is_dram) && (_is_cell)) {  // SRAM cell access transistor
    dt = &(g_tp.sram_cell);
  }
  else if ((_is_dram) && (_is_wl_tr)) {  // DRAM wordline transistor
    dt = &(g_tp.dram_wl);
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {  // DRAM or SRAM all other transistors
    dt = &(g_tp.peri_global);
  }
  return nwidth * dt->I_on_n;
}

double simplified_pmos_Isat(double pwidth, bool _is_dram, bool _is_cell,
                            bool _is_wl_tr, bool _is_sleep_tx) {
  TechnologyParameter::DeviceType *dt;

  if ((!_is_dram) && (_is_cell)) {  // SRAM cell access transistor
    dt = &(g_tp.sram_cell);
  }
  else if ((_is_dram) && (_is_wl_tr)) {  // DRAM wordline transistor
    dt = &(g_tp.dram_wl);
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {  // DRAM or SRAM all other transistors
    dt = &(g_tp.peri_global);
  }
  return pwidth * dt->I_on_n / dt->n_to_p_eff_curr_drv_ratio;
}

double simplified_nmos_leakage(double nwidth, bool _is_dram, bool _is_cell,
                               bool _is_wl_tr, bool _is_sleep_tx) {
  TechnologyParameter::DeviceType *dt;

  if ((!_is_dram) && (_is_cell)) {  // SRAM cell access transistor
    dt = &(g_tp.sram_cell);
  }
  else if ((_is_dram) && (_is_wl_tr)) {  // DRAM wordline transistor
    dt = &(g_tp.dram_wl);
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {  // DRAM or SRAM all other transistors
    dt = &(g_tp.peri_global);
  }
  return nwidth * dt->I_off_n;
}

double simplified_pmos_leakage(double pwidth, bool _is_dram, bool _is_cell,
                               bool _is_wl_tr, bool _is_sleep_tx) {
  TechnologyParameter::DeviceType *dt;

  if ((!_is_dram) && (_is_cell)) {  // SRAM cell access transistor
    dt = &(g_tp.sram_cell);
  }
  else if ((_is_dram) && (_is_wl_tr)) {  // DRAM wordline transistor
    dt = &(g_tp.dram_wl);
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {  // DRAM or SRAM all other transistors
    dt = &(g_tp.peri_global);
  }
  return pwidth * dt->I_off_p;
}

double cmos_Ig_n(double nWidth, bool _is_dram, bool _is_cell, bool _is_wl_tr,
                 bool _is_sleep_tx) {
  TechnologyParameter::DeviceType *dt;

  if ((!_is_dram) && (_is_cell)) {  // SRAM cell access transistor
    dt = &(g_tp.sram_cell);
  }
  else if ((_is_dram) && (_is_wl_tr)) {  // DRAM wordline transistor
    dt = &(g_tp.dram_wl);
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {  // DRAM or SRAM all other transistors
    dt = &(g_tp.peri_global);
  }
  return nWidth * dt->I_g_on_n;
}

double cmos_Ig_p(double pWidth, bool _is_dram, bool _is_cell, bool _is_wl_tr,
                 bool _is_sleep_tx) {
  TechnologyParameter::DeviceType *dt;

  if ((!_is_dram) && (_is_cell)) {  // SRAM cell access transistor
    dt = &(g_tp.sram_cell);
  }
  else if ((_is_dram) && (_is_wl_tr)) {  // DRAM wordline transistor
    dt = &(g_tp.dram_wl);
  }
  else if (_is_sleep_tx) {
    dt = &g_tp.sleep_tx;  // Sleep transistor
  }
  else {  // DRAM or SRAM all other transistors
    dt = &(g_tp.peri_global);
  }
  return pWidth * dt->I_g_on_p;
}

double cmos_Isub_leakage(double nWidth, double pWidth, int fanin,
                         enum Gate_type g_type, bool _is_dram, bool _is_cell,
                         bool _is_wl_tr, bool _is_sleep_tx,
                         enum Half_net_topology topo) {
  assert(fanin >= 1);
  double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell,
                                             _is_wl_tr, _is_sleep_tx);
  double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell,
                                             _is_wl_tr, _is_sleep_tx);
  double Isub = 0;
  int num_states;
  int num_off_tx;

  num_states = int(pow(2.0, fanin));

  switch (g_type) {
    case nmos:
      if (fanin == 1) {
        Isub = nmos_leak / num_states;
      }
      else {
        if (topo == parallel) {
          Isub = nmos_leak * fanin / num_states;  // only when all tx are off,
                                                  // leakage power is non-zero.
                                                  // The possibility of this
                                                  // state is 1/num_states
        }
        else {
          for (num_off_tx = 1; num_off_tx <= fanin;
               num_off_tx++)  // when num_off_tx ==0 there is no leakage power
          {
            // Isub +=
            // nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin,
            // num_off_tx)*factorial(num_off_tx)));
            Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) *
                    combination(fanin, num_off_tx);
          }
          Isub /= num_states;
        }
      }
      break;
    case pmos:
      if (fanin == 1) {
        Isub = pmos_leak / num_states;
      }
      else {
        if (topo == parallel) {
          Isub = pmos_leak * fanin / num_states;  // only when all tx are off,
                                                  // leakage power is non-zero.
                                                  // The possibility of this
                                                  // state is 1/num_states
        }
        else {
          for (num_off_tx = 1; num_off_tx <= fanin;
               num_off_tx++)  // when num_off_tx ==0 there is no leakage power
          {
            // Isub +=
            // pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin,
            // num_off_tx)*factorial(num_off_tx)));
            Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) *
                    combination(fanin, num_off_tx);
          }
          Isub /= num_states;
        }
      }
      break;
    case inv:
      Isub = (nmos_leak + pmos_leak) / 2;
      break;
    case nand:
      Isub += fanin * pmos_leak;  // the pullup network
      for (num_off_tx = 1; num_off_tx <= fanin;
           num_off_tx++)  // the pulldown network
      {
        // Isub +=
        // nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin,
        // num_off_tx)*factorial(num_off_tx)));
        Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) *
                combination(fanin, num_off_tx);
      }
      Isub /= num_states;
      break;
    case nor:
      for (num_off_tx = 1; num_off_tx <= fanin;
           num_off_tx++)  // the pullup network
      {
        // Isub +=
        // pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin,
        // num_off_tx)*factorial(num_off_tx)));
        Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) *
                combination(fanin, num_off_tx);
      }
      Isub += fanin * nmos_leak;  // the pulldown network
      Isub /= num_states;
      break;
    case tri:
      Isub += (nmos_leak + pmos_leak) / 2;  // enabled
      Isub += nmos_leak *
              UNI_LEAK_STACK_FACTOR;  // disabled upper bound of leakage power
      Isub /= 2;
      break;
    case tg:
      Isub = (nmos_leak + pmos_leak) / 2;
      break;
    default:
      assert(0);
      break;
  }

  return Isub;
}

double cmos_Ig_leakage(double nWidth, double pWidth, int fanin,
                       enum Gate_type g_type, bool _is_dram, bool _is_cell,
                       bool _is_wl_tr, bool _is_sleep_tx,
                       enum Half_net_topology topo) {
  assert(fanin >= 1);
  double nmos_leak =
      cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx);
  double pmos_leak =
      cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx);
  double Ig_on = 0;
  int num_states;
  int num_on_tx;

  num_states = int(pow(2.0, fanin));

  switch (g_type) {
    case nmos:
      if (fanin == 1) {
        Ig_on = nmos_leak / num_states;
      }
      else {
        if (topo == parallel) {
          for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
            Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx;
          }
        }
        else {
          Ig_on += nmos_leak * fanin;  // pull down network when all TXs are on.
          // num_on_tx is the number of on tx
          for (num_on_tx = 1; num_on_tx < fanin;
               num_on_tx++)  // when num_on_tx=[1,n-1]
          {
            Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx /
                     2;  // TODO: this is a approximation now, a precise
                         // computation will be very complicated.
          }
          Ig_on /= num_states;
        }
      }
      break;
    case pmos:
      if (fanin == 1) {
        Ig_on = pmos_leak / num_states;
      }
      else {
        if (topo == parallel) {
          for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
            Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx;
          }
        }
        else {
          Ig_on += pmos_leak * fanin;  // pull down network when all TXs are on.
          // num_on_tx is the number of on tx
          for (num_on_tx = 1; num_on_tx < fanin;
               num_on_tx++)  // when num_on_tx=[1,n-1]
          {
            Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx /
                     2;  // TODO: this is a approximation now, a precise
                         // computation will be very complicated.
          }
          Ig_on /= num_states;
        }
      }
      break;

    case inv:
      Ig_on = (nmos_leak + pmos_leak) / 2;
      break;
    case nand:
      // pull up network
      for (num_on_tx = 1; num_on_tx <= fanin;
           num_on_tx++)  // when num_on_tx=[1,n]
      {
        Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx;
      }

      // pull down network
      Ig_on += nmos_leak * fanin;  // pull down network when all TXs are on.
      // num_on_tx is the number of on tx
      for (num_on_tx = 1; num_on_tx < fanin;
           num_on_tx++)  // when num_on_tx=[1,n-1]
      {
        Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx /
                 2;  // TODO: this is a approximation now, a precise computation
                     // will be very complicated.
      }
      Ig_on /= num_states;
      break;
    case nor:
      // num_on_tx is the number of on tx in pull up network
      Ig_on += pmos_leak * fanin;  // pull up network when all TXs are on.
      for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
        Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
      }
      // pull down network
      for (num_on_tx = 1; num_on_tx <= fanin;
           num_on_tx++)  // when num_on_tx=[1,n]
      {
        Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx;
      }
      Ig_on /= num_states;
      break;
    case tri:
      Ig_on += (2 * nmos_leak + 2 * pmos_leak) / 2;  // enabled
      Ig_on +=
          (nmos_leak + pmos_leak) / 2;  // disabled upper bound of leakage power
      Ig_on /= 2;
      break;
    case tg:
      Ig_on = (nmos_leak + pmos_leak) / 2;
      break;
    default:
      assert(0);
      break;
  }

  return Ig_on;
}

double shortcircuit_simple(double vt, double velocity_index, double c_in,
                           double c_out, double w_nmos, double w_pmos,
                           double i_on_n, double i_on_p, double i_on_n_in,
                           double i_on_p_in, double vdd) {
  double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge,
      p_short_circuit_discharge_low,  // p_short_circuit_discharge_high,
      p_short_circuit_charge_low;     //,
  // p_short_circuit_charge_high;  // this is actually energy
  double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;

  fo_n = i_on_n / i_on_n_in;
  fo_p = i_on_p / i_on_p_in;
  fanout = c_out / c_in;
  beta_ratio = i_on_p / i_on_n;
  vt_to_vdd_ratio = vt / vdd;

  // p_short_circuit_discharge_low 	=
  // 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
  p_short_circuit_discharge_low =
      10 / 3 *
      (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) / pow(velocity_index, 2.0) /
       pow(2.0, 3 * vt_to_vdd_ratio * vt_to_vdd_ratio)) *
      c_in * vdd * vdd * fo_p * fo_p / fanout / beta_ratio;
  p_short_circuit_charge_low =
      10 / 3 *
      (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) / pow(velocity_index, 2.0) /
       pow(2.0, 3 * vt_to_vdd_ratio * vt_to_vdd_ratio)) *
      c_in * vdd * vdd * fo_n * fo_n / fanout * beta_ratio;
  //	double t1, t2, t3, t4, t5;
  //	t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
  //	t2=pow(velocity_index,2.0);
  //	t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
  //	t4=t1/t2/t3;
  //	cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;

  // p_short_circuit_discharge_high =
  //     pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) * c_in * vdd * vdd * fo_p / 10
  //     / pow(2, 3 * vt_to_vdd_ratio + 2 * velocity_index);
  // p_short_circuit_charge_high =
  //     pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) * c_in * vdd * vdd * fo_n / 10
  //     / pow(2, 3 * vt_to_vdd_ratio + 2 * velocity_index);

  //	t1=pow(((vdd-vt)-vt_to_vdd_ratio),1.5);
  //	t2=pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
  //	t3=t1/t2;
  //	cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
  //	p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low
  //+ 1.0/p_short_circuit_discharge_high); 	p_short_circuit_charge =
  // 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high);
  // //harmmoic  mean cannot be applied simple formulas.

  p_short_circuit_discharge = p_short_circuit_discharge_low;
  p_short_circuit_charge = p_short_circuit_charge_low;
  p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge) / 2;

  return (p_short_circuit);
}

double shortcircuit(double vt, double velocity_index, double c_in, double c_out,
                    double w_nmos, double w_pmos, double i_on_n, double i_on_p,
                    double i_on_n_in, double i_on_p_in, double vdd) {
  double p_short_circuit =
      0;  //,
          // p_short_circuit_discharge;  //, p_short_circuit_charge,
          // p_short_circuit_discharge_low,
          // p_short_circuit_discharge_high,
          // p_short_circuit_charge_low,
          // p_short_circuit_charge_high; //this is
          // actually energy
  // double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
  // double f_alpha, k_v, e, g_v_alpha, h_v_alpha;

  // fo_n = i_on_n / i_on_n_in;
  // fo_p = i_on_p / i_on_p_in;
  // fanout = 1;
  // beta_ratio = i_on_p / i_on_n;
  // vt_to_vdd_ratio = vt / vdd;
  // e = 2.71828;
  // f_alpha = 1 / (velocity_index + 2) -
  //           velocity_index / (2 * (velocity_index + 3)) +
  //           velocity_index / (velocity_index + 4) * (velocity_index / 2 - 1);
  // k_v = 0.9 / 0.8 + (vdd - vt) / 0.8 * log(10 * (vdd - vt) / e);
  // g_v_alpha = (velocity_index + 1) * pow((1 - velocity_index),
  // velocity_index)
  // *
  //             pow((1 - velocity_index), velocity_index / 2) / f_alpha /
  //             pow((1 - velocity_index - velocity_index),
  //                 (velocity_index / 2 + velocity_index + 2));
  // h_v_alpha = pow(2, velocity_index) * (velocity_index + 1) *
  //             pow((1 - velocity_index), velocity_index) /
  //             pow((1 - velocity_index - velocity_index), (velocity_index +
  //             1));

  // p_short_circuit_discharge_low 	=
  // 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
  //	p_short_circuit_discharge_low 	=
  // 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
  //	p_short_circuit_charge_low 		=
  // 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
  //	double t1, t2, t3, t4, t5;
  //	t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
  //	t2=pow(velocity_index,2.0);
  //	t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
  //	t4=t1/t2/t3;
  //
  //	cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
  //
  //
  //	p_short_circuit_discharge_high 	=
  // pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2,
  // 3*vt_to_vdd_ratio+2*velocity_index); 	p_short_circuit_charge_high
  //= pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2,
  // 3*vt_to_vdd_ratio+2*velocity_index);
  //
  //	p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low
  //+ 1.0/p_short_circuit_discharge_high); 	p_short_circuit_charge =
  // 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high);
  //
  //	p_short_circuit = (p_short_circuit_discharge +
  // p_short_circuit_charge)/2;
  //
  //	p_short_circuit = p_short_circuit_discharge;

  // p_short_circuit_discharge =
  //     k_v * vdd * vdd * c_in * fo_p * fo_p /
  //     ((vdd - vt) * g_v_alpha * fanout * beta_ratio / 2 / k_v +
  //      h_v_alpha * fo_p);
  return (p_short_circuit);
}
